import numpy as np
import pandas as pd
from pandas import DataFrame
from datetime import datetime
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
%matplotlib inline
!ls
#Available data up to 2/22/2020
confirmed = pd.read_csv('time_series_2019-ncov-Confirmed.csv').fillna(0)
death = pd.read_csv('time_series_2019-ncov-Deaths.csv').fillna(0)
recovered = pd.read_csv('time_series_2019-ncov-Recovered.csv').fillna(0)
#convert column name to datetime format
confirmedtimeframe = confirmed.iloc[:,4:len(confirmed.columns)]
confirmedtimeframe.columns = pd.to_datetime(confirmedtimeframe.columns).date
deathtimeframe = death.iloc[:,4:len(death.columns)]
deathtimeframe.columns = pd.to_datetime(deathtimeframe.columns).date
recoveredtimeframe = recovered.iloc[:,4:len(recovered.columns)]
recoveredtimeframe.columns = pd.to_datetime(recoveredtimeframe.columns).date
confirmed = pd.concat([confirmed.iloc[:,0:4],confirmedtimeframe],axis = 1,sort=True)
death = pd.concat([death.iloc[:,0:4], deathtimeframe],axis = 1,sort=True)
recovered = pd.concat([recovered.iloc[:,0:4], recoveredtimeframe],axis = 1,sort=True)
#Melt indivdual timeframe data to values under 'Date'
confirmed = confirmed.melt(id_vars=["Province/State", "Country/Region",'Lat','Long'], var_name="Date", value_name="Confirmed")
death = death.melt(id_vars=["Province/State", "Country/Region",'Lat','Long'], var_name="Date", value_name="Death")
recovered = recovered.melt(id_vars=["Province/State", "Country/Region",'Lat','Long'], var_name="Date", value_name="Recovered")
#Consolidate dataframe that's ready to be used
data = pd.concat([confirmed, death.iloc[:,5],recovered.iloc[:,5]], axis =1, sort = True)
#returns all provinces and states affected
confirmed['Province/State'].unique()
#returns all country and regions affected
confirmed['Country/Region'].unique()
#separate China region from and other regions worldwide
chinaregion = data.loc[(data['Country/Region'] == 'Mainland China')|(data['Country/Region'] == 'Taiwan')|
(data['Country/Region'] =='Hong Kong')|(data['Country/Region'] =='Macau'),:]
ind = data.loc[(data['Country/Region'] == 'Mainland China')|(data['Country/Region'] == 'Taiwan')|
(data['Country/Region'] =='Hong Kong')|(data['Country/Region'] =='Macau'),:].index
nonchinaregion = data.drop(data.index[ind],inplace = False).reset_index(drop = True)
chinaregionpivot = pd.pivot_table(chinaregion,index=["Country/Region","Province/State",'Date'],
values = ['Confirmed','Recovered','Death'],
aggfunc = np.mean)
nonchinaregionpivot = pd.pivot_table(nonchinaregion,index=["Country/Region","Province/State",'Date'],
values = ['Confirmed','Recovered','Death'],
aggfunc = np.mean)
#Function to extract timeframe table per location
#region: China: chinaregionpivot/nonchinaregionpivot
#country: Country/Region
#state: Province/State
def getlocationdata(region, country, state):
location = region.loc[(country, state,), :]
return location
#Insert region, country, state values for the location in question and rename the timeframe table
Hubei = getlocationdata(chinaregionpivot, 'Mainland China', 'Hubei')
plt.figure(figsize=(16,8))
Hubei.loc[:,'Confirmed'].sort_index().plot.line(color='b',marker='o',linestyle ='-.')
plt.ylabel('Number of Patient')
plt.title('Confirmed in Hubei')
plt.legend()
plt.figure(figsize=(16,8))
Hubei.loc[:,'Death'].sort_index().plot.line(color='r',marker='x',linestyle ='--')
Hubei.loc[:,'Recovered'].sort_index().plot.line(color='g',marker='o',linestyle ='--')
plt.ylabel('Number of Patient')
plt.title('Death and Recovery in Hubei')
plt.legend()
#Get the latest data
chinalatest = pd.DataFrame(chinaregion.groupby(['Province/State'])['Lat','Long','Confirmed','Recovered','Death'].agg('max')).sort_values(by=['Confirmed'], ascending = False).reset_index()
fig = px.bar(chinalatest, x='Province/State', y='Confirmed',color = 'Province/State',color_discrete_sequence= px.colors.qualitative.Set3,
hover_data=['Confirmed'], title = 'Confirmed in China Region')
fig.show()
fig = px.bar(chinalatest.loc[chinalatest['Province/State'] != 'Hubei',:], x='Province/State', y='Confirmed',color = 'Province/State',
color_discrete_sequence= px.colors.qualitative.Set3,
hover_data=['Confirmed'], title = 'Confirmed in China Region Excluding Hubei')
fig.show()
nonchinalatest = pd.DataFrame(nonchinaregion.groupby(['Country/Region'])['Lat','Long','Confirmed','Recovered','Death'].agg('max')).sort_values(by = 'Confirmed', ascending = False).reset_index()
fig = px.bar(nonchinalatest.loc[nonchinalatest['Country/Region'] != 'Others',:], x='Country/Region', y='Confirmed',color = 'Country/Region',
color_discrete_sequence= px.colors.qualitative.Set3,
hover_data=['Confirmed'], title = 'Confirmed outside China')
fig.show()
chinalatest['Death Rate'] = chinalatest['Death']/chinalatest['Confirmed']
chinalatest['Recover Rate'] = chinalatest['Recovered']/chinalatest['Confirmed']
nonchinalatest['Death Rate'] = nonchinalatest['Death']/nonchinalatest['Confirmed']
nonchinalatest['Recover Rate'] = nonchinalatest['Recovered']/nonchinalatest['Confirmed']
#Death v.s. Confirmed in other part of China
fig = px.scatter(chinalatest.loc[chinalatest['Province/State'] != 'Hubei',:], x="Confirmed", y="Death", size = 'Recover Rate',
color="Province/State",color_discrete_sequence= px.colors.qualitative.Plotly,size_max=60,
title = 'Deaths in Confirmed in Other Part of China')
fig.show()
#Death v.s. Confirmed outside China
fig = px.scatter(nonchinalatest, x="Confirmed", y="Death", size = 'Recover Rate',
color="Country/Region",color_discrete_sequence= px.colors.qualitative.Plotly,size_max=60,
title = 'Deaths in Confirmed outside China')
fig.show()
#region: China: chinaregionpivot/nonchinaregionpivot
#country: Country/Region
#state: Province/State (enter 0 if shows '0')
def getincremental(region, country, state):
incc = []
incd = []
incr = []
for i in range(len(getlocationdata(region, country, state))-1):
c = getlocationdata(region, country, state)['Confirmed'][i+1]-getlocationdata(region, country, state)['Confirmed'][i]
d = getlocationdata(region, country, state)['Death'][i+1]-getlocationdata(region, country, state)['Death'][i]
r = getlocationdata(region, country, state)['Recovered'][i+1]-getlocationdata(region, country, state)['Recovered'][i]
incc.append(c)
incd.append(d)
incr.append(r)
dic = {'Incremental Confirmed': incc, 'Incremental Death': incd, 'Incremental Recovered': incr}
incremental = pd.DataFrame(data = dic)
incremental['Day'] = np.arange(1,len(incremental)+1,1)
return incremental
#Specify the region you want to explore
region = chinaregionpivot
country = 'Mainland China'
state = 'Hubei'
incremental = getincremental(region, country, state)
incremental
#Plot trend of incremental confirms
fig = go.Figure()
fig.add_trace(go.Scatter(x = incremental['Day'], y=incremental['Incremental Confirmed'], name='Confirmed',
mode='lines+markers',line=dict(color='firebrick', width=2, dash='dash')))
if state != 0:
fig.update_layout(title='Incremental Confirms in ' + state,
xaxis_title='Days',
yaxis_title='Confirms')
else:
fig.update_layout(title='Incremental Confirms in ' + country,
xaxis_title='Days',
yaxis_title='Confirms')
fig.show()
fig = go.Figure()
fig.add_trace(go.Scatter(x = incremental['Day'], y=incremental['Incremental Death'],
name='Death', mode='lines+markers',line=dict(color='firebrick', width=2, dash='dot')))
fig.add_trace(go.Scatter(x = incremental['Day'], y=incremental['Incremental Recovered'],
name='Recovered', mode='lines+markers',line=dict(color='royalblue', width=2,dash='dot')))
if state != 0:
fig.update_layout(title='Incremental Death and Recovery in ' + state,
xaxis_title='Days',
yaxis_title='Number of Patients')
else:
fig.update_layout(title='Incremental Death and Recovery in ' + country,
xaxis_title='Days',
yaxis_title='Number of Patients')
fig.show()
chinaregion['Date'] = pd.to_datetime(chinaregion['Date'])
chinaregion['Date'] = chinaregion['Date'].dt.strftime('%b.%d')
nonchinaregion['Date'] = pd.to_datetime(nonchinaregion['Date'])
nonchinaregion['Date'] = nonchinaregion['Date'].dt.strftime('%b.%d')
#Spreading in all China regions
fig = px.scatter_geo(chinaregion, lat ="Lat", lon = 'Long',
color="Confirmed",size = 'Confirmed', animation_frame="Date",
hover_name="Province/State", size_max=20,
color_continuous_scale=px.colors.sequential.Burg)
fig.update_layout(title='Confirmed in China thru Timeline')
fig.show()
#Spreading in other China regions (Excluding Hubei)
fig = px.scatter_geo(chinaregion.loc[chinaregion['Province/State'] != 'Hubei',:], lat ="Lat", lon = 'Long',
color="Confirmed",size = 'Confirmed', animation_frame="Date",
hover_name="Province/State", size_max=10,
color_continuous_scale=px.colors.sequential.Burg)
fig.update_layout(title='Confirmed in China (excluding Hubei) thru Timeline')
fig.show()
#Spreading in all regions outside China
fig = px.scatter_geo(nonchinaregion, lat ="Lat", lon = 'Long',
color="Confirmed",size = 'Confirmed',animation_frame="Date",
hover_name="Country/Region", size_max=20,
color_continuous_scale=px.colors.sequential.Burg)
fig.update_layout(title='Confirmed outside China thru Timeline')
fig.show()
#Spreading in all named regions outside China
fig = px.scatter_geo(nonchinaregion.loc[(nonchinaregion['Country/Region'] != 'Others')|(nonchinaregion['Country/Region'] != 0),:],
lat ="Lat", lon = 'Long',
color="Confirmed",size = 'Confirmed', animation_frame="Date",
hover_name="Province/State", size_max=30,
color_continuous_scale=px.colors.sequential.Burg)
fig.update_layout(title='Confirmed Outside China thru Timeline (only includes named regions)')
fig.show()